/**********************************************************************
gc_8_structural_1st_stage.do

**********************************************************************/
**********
* SET UP *
**********
clear all
set matsize 2000
set more 1

* location for dofiles *
cd "T:\_Projet_4915\dofiles"
global dir "`c(pwd)'"
cd $dir

sysdir set PERSONAL "T:\_Projet_4915\ado"

* specify file locations *
global project_folder "\_Projet_4915"
global data_folder "\_Projet_4915\DATA"
global output_folder "\_Projet_4915\ResultsFolder"
global temp "temp"

local datadir T:\${data_folder}\

*********
* GATES *
*********
* Specify which data you want to work with (synthetic = syn, real = rl) *
local ext = "rl"

* Specify which gender to process (men = men) *
local gender = "men"

* Specify first observed age (24 25 26) *
local a = 25

* Specify occupational definition to use (occ_version 1 2)
local v = 2

* Specify firm class definition (1 2 3 4 5 6)
local d = 6

* Specify the number of unobservable types *
global types  "3 5"

* Specify earnings data generating process *
local y_dgp = 3

* STEP 1: initialize dataset for analysis *
local gate1 = 1

* STEP 2: structural estimation - first stage *
local gate2 = 1

* STEP X: delete all intermediate datasets *
local gateX = 1

*******************************************
* MAKE A GLOBAL VARIABLE FOR TODAY'S DATE *
*******************************************
local tyr = substr("$S_DATE",8,4)
local tmo = substr("$S_DATE",4,3)
local tmd = trim(substr("$S_DATE",1,2))

* make day of month two digits *
local wl = length("`tmd'")
if `wl'==1 {
local tmd2 ="0"+"`tmd'"
}
if `wl'!=1 {
local tmd2 ="`tmd'"
}

* get numeric month, make it two digits *
local di="`tmd2'"+"`tmo'"+"`tyr'"
local edate = date("`di'", "DMY")
local mono = month(`edate')
local ml = length("`mono'")
if `ml'==1 {
local mono2 ="0"+"`mono'"
}
if `ml'!=1 {
local mono2 ="`mono'"
}

* put final date together *
global date = "`tyr'"+"`mono2'"+"`tmd2'"

****************************************
* DEFINE LITTLE PROGRAMS TO PRINT TIME *
****************************************
program define starttime
	display "Started processing at $S_TIME on $S_DATE"
end

program define endtime
	display "Finished processing at $S_TIME on $S_DATE"
end

************************
* START OF THE PROGRAM *
************************

foreach n_types of global types ///
{
	
* start log file *
quietly capture log close
quietly log using gc_8_structural_1st_stage_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'_`ext', text replace

********************************************
* STEP 1: initialize datasets for analysis *
********************************************
disp "***** Started processing STEP 1 *****"
starttime
if 1 == `gate1' ///
{
	disp "***** STEP 1: initialize datasets for analysis *****"
	disp "***** Time is now $S_TIME *****"
		
	local datadir T:\${data_folder}\
	
	capture erase "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
	capture erase "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
	
	use "`datadir'gc_analyze_me_`gender'_start`a'_occ_v`v'_`ext'.dta", clear

	*****************************************************************
	* construct a two digit categorical variable to denote action *
	*****************************************************************
	* first digit denotes occupation *
	* 1 = paid employee, 2 = self-employed, 3 = entrepreneur, 9 = unemployed *
	tostring(occ_v`v'), gen(first_digit)
	replace first_digit = "9" if occ_v`v' == 0
	
	* second digit denotes firm class *
	gen firm_class = 0
	
	if `d' == 1 ///
	{
		replace firm_class = 1 if f1_employment_4c == 1
		replace firm_class = 2 if f1_employment_4c == 2
		replace firm_class = 3 if f1_employment_4c == 3 | f1_employment_4c == 4
	}
	if `d' == 2 ///
	{
		replace firm_class = 1 if f2_employment_4c == 1
		replace firm_class = 2 if f2_employment_4c == 2
		replace firm_class = 3 if f2_employment_4c == 3 | f2_employment_4c == 4
	}
	if `d' == 3 ///
	{
		replace firm_class = 1 if f1_rev_perL_4c == 1
		replace firm_class = 2 if f1_rev_perL_4c == 2
		replace firm_class = 3 if f1_rev_perL_4c == 3 | f1_rev_perL_4c == 4
	}
	if `d' == 4 ///
	{
		replace firm_class = 1 if f2_rev_perL_4c == 1
		replace firm_class = 2 if f2_rev_perL_4c == 2
		replace firm_class = 3 if f2_rev_perL_4c == 3 | f2_rev_perL_4c == 4
	}
	if `d' == 5 ///
	{
		replace firm_class = 1 if f1_payroll_perL_4c == 1
		replace firm_class = 2 if f1_payroll_perL_4c == 2
		replace firm_class = 3 if f1_payroll_perL_4c == 3 | f1_payroll_perL_4c == 4
	}
	if `d' == 6 ///
	{
		replace firm_class = 1 if f2_payroll_perL_4c == 1
		replace firm_class = 2 if f2_payroll_perL_4c == 2
		replace firm_class = 3 if f2_payroll_perL_4c == 3 | f2_payroll_perL_4c == 4
	}
				
	tostring(firm_class), gen(second_digit)
	replace second_digit = "0" if occ_v`v' == 0
	replace second_digit = "0" if occ_v`v' == 2
	replace second_digit = "0" if occ_v`v' == 3
	gen action = first_digit + second_digit
	destring(action), replace
	drop second_digit
				
	*****************************************************************************
	* create categorical variable to denote action over the past 11 periods *
	*****************************************************************************
	capture drop period
	sort pid tax_yr
	by pid: gen period = _n
	tab period
	forvalues i = 1(1)11 ///
	{
		gen L`i'action = .
		by pid: replace L`i'action = action[_n - `i'] if period > `i'
	}
	
	***********************************
	* construct vectors of experience *
	***********************************
	* create a list of all possible actions *
	foreach act in ///
	"11" "12" "13" ///
	"20" "30" "90" ///
	{
		forvalues i = 1(1)11 ///
		{
			gen flag`i' = 0
			replace flag`i' = 1 if L`i'action == `act'
		}
		* construct vector of experience accumulated since first year *
		egen exper_`act' = rowtotal(flag*)
		drop flag*
	}

	* Set last action to L2action if last action is nonemployment and L2action != . *
	rename L1action last_action
	rename L2action lastlast_action
	replace last_action = lastlast_action if last_action == 90 & lastlast_action != .
	drop L*
	
	rename last_action Laction
	rename lastlast_action L2action
	
	* construct main outcome variable *
	gen y = y_v`v'
	gen ln_y = log(y)
	
	* construct numeric person ID *
	destring pid, gen(pid_int)

	* Set initial condition for Laction to action in the initial period *
	replace Laction = action if age == `a'
	
	* construct an initial condition variable *
	gen initial_tmp = .
	replace initial_tmp = Laction if age == `a'
	bys pid_int: egen action_entry = max(initial_tmp)
	drop initial_tmp
	
	rename res_pc_inc_entry inc_entry
	rename res_pc_busipopden_entry busiden_entry
	foreach var in inc_entry busiden_entry ///
	{
		gen `var'_cat = 0
		capture sum `var' if age == `a', detail
		display "`var' 75th pctile is `r(p75)'"
		replace `var'_cat = 1 if `var' >= `r(p75)'
	}
	
	* construct aggregate experience variables *
	gen exper_10 = exper_11 + exper_12 + exper_13
	replace age = age-`a'
	gen age_sq = age*age
	gen age_cu = age*age_sq
	
	* construct polynomials of experience *
	foreach var in ///
	"exper_10" ///
	"exper_11" "exper_12" "exper_13" ///
	"exper_20" "exper_30" "exper_90" ///
	{
		gen `var'_sq = `var'*`var'
	}
	
	sort pid tax_yr
	
	* keep key variables *
	keep pid pid_int tax_yr age* y ln_y action Laction exper* *entry*
	
	* order key variables *
	order pid pid_int tax_yr age* y ln_y action Laction exper* *entry*
	
	* drop last year of data (=2012). To be used to assess out of sample goodness of fit. *
	drop if tax_yr == 2012
	
	**********************
	* INITIAL GUESS ON q *
	**********************
	bys pid_int: egen mean_y = mean(y)
	replace mean_y = ln(mean_y)
	
	if `n_types' == 1 ///
	{
		gen q1 = 1
		gen sum_q = q1
		gen tmp_pc100 = 1
	}
	
	if `n_types' == 2 ///
	{
		egen tmp_pc50 = pctile(mean_y), p(50)
		gen q1 = .2
		gen q2 = .2
		replace q1 = .8 if mean_y <= tmp_pc50
		replace q2 = .8 if mean_y > tmp_pc50
		gen sum_q = q1+q2
	}
	
	if `n_types' == 3 ///
	{
		egen tmp_pc33 = pctile(mean_y), p(33)
		egen tmp_pc66 = pctile(mean_y), p(66)
		gen q1 = .15
		gen q2 = .15
		gen q3 = .15
		replace q1 = .7 if mean_y <= tmp_pc33
		replace q2 = .7 if mean_y > tmp_pc33 & mean_y <= tmp_pc66
		replace q3 = .7 if mean_y > tmp_pc66
		gen sum_q = q1+q2+q3
	}
	
	if `n_types' == 4 ///
	{
		egen tmp_pc25 = pctile(mean_y), p(25)
		egen tmp_pc50 = pctile(mean_y), p(50)
		egen tmp_pc75 = pctile(mean_y), p(75)
		gen q1 = .1
		gen q2 = .1
		gen q3 = .1
		gen q4 = .1
		replace q1 = .7 if mean_y <= tmp_pc25
		replace q2 = .7 if mean_y > tmp_pc25 & mean_y <= tmp_pc50
		replace q3 = .7 if mean_y > tmp_pc50 & mean_y <= tmp_pc75
		replace q4 = .7 if mean_y > tmp_pc75
		gen sum_q = q1+q2+q3+q4
	}
	
	if `n_types' == 5 ///
	{
		egen tmp_pc20 = pctile(mean_y), p(20)
		egen tmp_pc40 = pctile(mean_y), p(40)
		egen tmp_pc60 = pctile(mean_y), p(60)
		egen tmp_pc80 = pctile(mean_y), p(80)
		gen q1 = .1
		gen q2 = .1
		gen q3 = .1
		gen q4 = .1
		gen q5 = .1
		replace q1 = .6 if mean_y <= tmp_pc20
		replace q2 = .6 if mean_y > tmp_pc20 & mean_y <= tmp_pc40
		replace q3 = .6 if mean_y > tmp_pc40 & mean_y <= tmp_pc60
		replace q4 = .6 if mean_y > tmp_pc60 & mean_y <= tmp_pc80
		replace q5 = .6 if mean_y > tmp_pc80
		gen sum_q = q1+q2+q3+q4+q5
	}
	
	if `n_types' == 6 ///
	{
		egen tmp_pc17 = pctile(mean_y), p(17)
		egen tmp_pc33 = pctile(mean_y), p(33)
		egen tmp_pc50 = pctile(mean_y), p(50)
		egen tmp_pc67 = pctile(mean_y), p(67)
		egen tmp_pc83 = pctile(mean_y), p(83)
		gen q1 = .1
		gen q2 = .1
		gen q3 = .1
		gen q4 = .1
		gen q5 = .1
		gen q6 = .1
		replace q1 = .5 if mean_y <= tmp_pc17
		replace q2 = .5 if mean_y > tmp_pc17 & mean_y <= tmp_pc33
		replace q3 = .5 if mean_y > tmp_pc33 & mean_y <= tmp_pc50
		replace q4 = .5 if mean_y > tmp_pc50 & mean_y <= tmp_pc67
		replace q5 = .5 if mean_y > tmp_pc67 & mean_y <= tmp_pc83
		replace q6 = .5 if mean_y > tmp_pc83
		gen sum_q = q1+q2+q3+q4+q5+q6
	}
	sum sum_q, detail
	drop sum_q
	drop mean_y y tmp_pc*
	reshape long q, i(pid_int tax_yr) j(state)
	
	preserve
	drop q
	compress
	save "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
	restore

	keep pid_int state q
	rename state state_index
	sort pid_int state_index
	duplicates drop

	compress
	save "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
	clear
	
	disp "***** Time is now $S_TIME *****"
	disp "***** STEP 1: initialize datasets for analysis (COMPLETED) *****"
}
disp "***** Finished processing STEP 1 *****"
endtime

*************************************************
* STEP 2: structural estimation - first stage   *
*************************************************
disp "***** Started processing STEP 2 *****"
starttime
if 1 == `gate2' ///
{
	disp "***** STEP 2: structural estimation - first stage *****"
	local datadir T:\${data_folder}\
	
	confirm file "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
	confirm file "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
	
	local EM_loop = 1
	local converged = 0
	while `converged' < 1 ///
	{
		disp "***** This is loop number `EM_loop' of the EM algorithm *****"
		disp "***** Time is now $S_TIME *****"
		
		***************************************************
		* STEP 1: OBTAIN CONDITIONAL CHOICE PROBABILITIES *
		***************************************************
		disp "***** STEP 2.1: OBTAIN CONDITIONAL CHOICE PROBABILITIES *****"
		disp "***** Time is now $S_TIME *****"

		local datadir T:\${data_folder}\
		use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
		rename state state_index
		merge m:1 pid_int state_index using "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		rename state_index state
		order pid pid_int state tax_yr ln_y action Laction *entry*

		sort pid_int tax_yr state
		
		* Create action dummy variables and CCPs *
		foreach act in ///
		"11" "12" "13" ///
		"20" "30" "90" ///
		{
			gen action_`act' = 0
			replace action_`act' = 1 if action == `act'
			gen CCP`act' = .
		}
		
		* Run linear probability models locally for each Laction. *
		local counter = 1
		
		**************************************************
		* Variables included in linear probability model *
		**************************************************
		global coeffs_CCP "exper_11 exper_12 exper_13 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq age age_sq"
		
		* Loop through state *
		forvalues s = 1(1)`n_types' ///
		{
			* Loop through Laction *
			foreach Lact in ///
			"11" "12" "13" ///
			"20" "30" "90" ///
			{
				* Loop through action dummies *
				foreach act in ///
				"11" "12" "13" ///
				"20" "30" "90" ///
				{
					disp "***** Now running linear probability model locally for: CCP action = `act', Laction = `Lact', and type = `s' *****"
					disp "***** Iteration `counter', time is now $S_TIME *****"
					local datadir T:\${data_folder}\
					estimates clear
					capture reg action_`act' ${coeffs_CCP} if Laction == `Lact' & state == `s' [aw=q]
					local datadir T:\${data_folder}\gc_ster\
					estimates save "`datadir'gc_structural_CCP`act'_L`Lact'_s`s'_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.ster", replace
					predict tmpCCP`act'_`Lact'_`s' if Laction == `Lact' & state == `s'
					replace CCP`act' = tmpCCP`act'_`Lact'_`s' if Laction == `Lact' & state == `s'
					* Winsorize CCPs *
					replace CCP`act' = 0.0000001 if CCP`act' <= 0.0000001 & Laction == `Lact' & state == `s'
					replace CCP`act' = 0.9999999 if CCP`act' >= 0.9999999 & CCP`act' != . & Laction == `Lact' & state == `s'
					drop tmp*
					local counter = `counter' + 1
				}
			}
		}
		
		keep state Laction age ///
		exper_11 exper_12 exper_13 ///
		exper_20 exper_30 ///
		CCP*
		
		order state Laction age ///
		exper_11 exper_12 exper_13 ///
		exper_20 exper_30 ///
		CCP*
		
		foreach var in "state" "Laction" "age" ///
		"exper_11" "exper_12" "exper_13" ///
		"exper_20" "exper_30" ///
		{
			rename `var' `var'_index
		}
		duplicates drop
		compress
		local datadir T:\${data_folder}\
		save "`datadir'gc_structural_CCPs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
		clear

		disp "***** Time is now $S_TIME *****"
		disp "***** STEP 2.1: OBTAIN CONDITIONAL CHOICE PROBABILITIES (COMPLETED) *****"
		
		************************************************************
		* STEP 2: OBTAIN EXPECTED INCOME AND ASSOCIATED PARAMETERS *
		************************************************************
		disp "***** STEP 2.2: OBTAIN EXPECTED INCOME AND ASSOCIATED PARAMETERS *****"
		disp "***** Time is now $S_TIME *****"

		local datadir T:\${data_folder}\
		use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
		rename state state_index
		merge m:1 pid_int state_index using "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		rename state_index state
		order pid pid_int state tax_yr ln_y action Laction *entry*
		sort pid_int tax_yr state
		
		******************************************
		* Variables included in payoff functions *
		******************************************
		global coeffs_y_m1 "exper_11 exper_12 exper_13 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq"
		global coeffs_y_m2 "exper_11 exper_12 exper_13 exper_10_sq exper_20 exper_20_sq exper_30 exper_30_sq i.state"
		global coeffs_y_m3 "c.exper_11#i.state c.exper_12#i.state c.exper_13#i.state c.exper_10_sq#i.state c.exper_20#i.state c.exper_20_sq#i.state c.exper_30#i.state c.exper_30_sq#i.state i.state"
		foreach act in ///
		"11" "12" "13" ///
		"20" "30" ///
		{
			disp "***** Now estimating parameters of the payoff function for action: `act' *****"
			disp "***** Time is now $S_TIME *****"
			local datadir T:\${data_folder}\
			estimates clear
			reg ln_y ${coeffs_y_m`y_dgp'} if action == `act' [aw=q], cluster(pid_int)
			local datadir T:\${data_folder}\gc_ster\
			estimates save "`datadir'gc_structural_y`act'_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.ster", replace
			predict ln_y_hat_`act'
			estimates store m_`y_dgp'
			estadd ysumm
			local datadir \${output_folder}\
			estout m_`y_dgp' using "`datadir'gc_structural_y`act'_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.txt", replace ///
			varwidth(12) cells(b(star fmt(%9.3f)) ///
			se(par(`"="("' `")""') fmt(%9.3f))) ///
			starlevels(* 0.1 ** 0.05 *** 0.01) ///
			stats(ymean r2_a N_clust N, /// 
			fmt(%9.2f %9.2f %9.0g %9.0g) ///
			labels("Mean, Dependent Variable" "adjusted R-squared" "Number of individuals" "Number of obs."))
		}

		keep state age ///
		exper_11 exper_12 exper_13 ///
		exper_20 exper_30 ///
		ln_y_hat*
		
		order state age ///
		exper_11 exper_12 exper_13 ///
		exper_20 exper_30 ///
		ln_y_hat*
		
		foreach var in "state" "age" ///
		"exper_11" "exper_12" "exper_13" ///
		"exper_20" "exper_30" ///
		{
			rename `var' `var'_index
		}
		duplicates drop
		compress
		local datadir T:\${data_folder}\
		save "`datadir'gc_structural_ln_y_hats_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
		clear

		disp "***** Time is now $S_TIME *****"
		disp "***** STEP 2.2: OBTAIN EXPECTED INCOME AND ASSOCIATED PARAMETERS (COMPLETED) *****"

		*************************************************
		* STEP 3: CALCULATE UNCONDITIONAL PROBABILITIES *
		*************************************************
		disp "***** STEP 2.3: CALCULATE UNCONDITIONAL PROBABILITIES *****"
		disp "***** Time is now $S_TIME *****"

		local datadir T:\${data_folder}\
		use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
		rename state state_index
		merge m:1 pid_int state_index using "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		rename state_index state
		order pid pid_int state tax_yr ln_y action Laction *entry*
		sort pid_int tax_yr state
		
		* Calculate unconditional probabilities / initial condition *
		egen Pi = mean(q), by(state action_entry res_city_size_entry inc_entry_cat busiden_entry_cat)

		keep state action_entry res_city_size_entry inc_entry_cat busiden_entry_cat Pi
		order state action_entry res_city_size_entry inc_entry_cat busiden_entry_cat Pi

		rename state state_index
		rename action_entry action_entry_index
		rename res_city_size_entry res_city_size_entry_index
		rename inc_entry_cat  inc_entry_cat_index
		rename busiden_entry_cat busiden_entry_cat_index
		duplicates drop
		compress
		save "`datadir'gc_structural_Pi_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
		
		disp "***** Time is now $S_TIME *****"
		disp "***** STEP 2.3: CALCULATE UNCONDITIONAL PROBABILITIES (COMPLETED) *****"

		**************************************************
		* STEP 4: CALCULATE DENSITY OF WAGE DISTRIBUTION *
		**************************************************
		disp "***** STEP 2.4: CALCULATE DENSITY OF WAGE DISTRIBUTION *****"
		disp "***** Time is now $S_TIME *****"
		
		local datadir T:\${data_folder}\
		use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
		
		foreach var in "state" "age" ///
		"exper_11" "exper_12" "exper_13" ///
		"exper_20" "exper_30" ///
		{
			rename `var' `var'_index
		}
		
		merge m:1 state_index age_index ///
		exper_11_index exper_12_index exper_13_index ///
		exper_20_index exper_30_index ///
		using "`datadir'gc_structural_ln_y_hats_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		order pid pid_int state_index tax_yr ln_y action Laction *entry*
		
		foreach var in "state" "age" ///
		"exper_11" "exper_12" "exper_13" ///
		"exper_20" "exper_30" ///
		{
			rename `var'_index `var'
		}
		
		gen ln_y_hat = .
		foreach act in ///
		"11" "12" "13" ///
		"20" "30" ///
		{
			replace ln_y_hat = ln_y_hat_`act' if action == `act'
		}

		sort pid_int tax_yr state
		egen sigma = sd(ln_y), by(action)
		gen z = (ln_y-ln_y_hat)/sigma
		gen double probY = normalden(z)
		drop ln_y_hat*
		replace probY = 1 if action == 90
		keep pid_int state tax_yr probY
		order pid_int state tax_yr probY
		rename state state_index
		rename tax_yr tax_yr_index
		compress
		save "`datadir'gc_structural_probY_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
		clear

		disp "***** Time is now $S_TIME *****"
		disp "***** STEP 2.4: CALCULATE DENSITY OF WAGE DISTRIBUTION (COMPLETED) *****"
		
		****************************************
		* STEP 5: CALCULATE THE LOG LIKELIHOOD *
		****************************************
		disp "***** STEP 2.5: CALCULATE THE LOG LIKELIHOOD *****"
		disp "***** Time is now $S_TIME *****"
		
		local datadir T:\${data_folder}\
		use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
		
		* Count number of individuals in estimation sample *
		count if age == 0 & state == 1
		local N = `r(N)'
		display "There are `N' individuals in the data."
		
		rename state state_index
		rename tax_yr tax_yr_index
		merge 1:1 pid_int state_index tax_yr_index using "`datadir'gc_structural_probY_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		
		keep pid pid_int state_index tax_yr_index ln_y action Laction age *entry* ///
		exper_11 exper_12 exper_13 ///
		exper_20 exper_30 ///
		probY

		foreach var in "Laction" "age" ///
		"action_entry" "res_city_size_entry" "inc_entry_cat" "busiden_entry_cat" ///
		"exper_11" "exper_12" "exper_13" ///
		"exper_20" "exper_30" ///
		{
			rename `var' `var'_index
		}
		
		merge m:1 state_index action_entry_index res_city_size_entry_index inc_entry_cat_index busiden_entry_cat_index using "`datadir'gc_structural_Pi_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		
		merge m:1 state_index Laction_index age_index ///
		exper_11_index exper_12_index exper_13_index ///
		exper_20_index exper_30_index ///
		using "`datadir'gc_structural_CCPs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge
		
		gen aCCP = .
		foreach act in ///
		"11" "12" "13" ///
		"20" "30" "90" ///
		{
			replace aCCP = CCP`act' if action == `act'
		}
		drop CCP*
		
		merge m:1 pid_int state_index using "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
		drop if _merge == 2
		drop _merge

		keep pid_int state_index tax_yr_index Pi probY aCCP q
		order pid_int state_index tax_yr_index Pi probY aCCP q
		rename tax_yr_index tax_yr
		rename state_index state
		sort pid_int state tax_yr
		order pid_int state tax_yr
		gen ll1 = q*log(aCCP)
		gen ll2 = q*log(probY)
		gen ll3 = q*log(Pi)
		gen ll4 = -1*q*log(q)
		
		* calculate the sum of ll1 and the sum of ll2 over all years by individual - state *
		collapse (sum) ll1 ll2 (mean) ll3 ll4, by(pid_int state)
		
		* calculate the sum of ll1, ll2, ll3, and ll4 over all states and over all individuals *
		collapse (sum) ll1 ll2 ll3 ll4
		
		* compute log likelihood *
		gen double loglikelihood = ll1 + ll2 + ll3 + ll4
		
		* compute BIC criterion *
		* note: the formula uses the total number of parameters in the earnings and CCPs equation.
		if `n_types' == 1 ///
		{
			gen double BIC = 410*ln(`N')-2*loglikelihood
		}
		if `n_types' == 2 ///
		{
			gen double BIC = 815*ln(`N')-2*loglikelihood
		}
		if `n_types' == 3 ///
		{
			gen double BIC = 1220*ln(`N')-2*loglikelihood
		}
		if `n_types' == 4 ///
		{
			gen double BIC = 1625*ln(`N')-2*loglikelihood
		}
		if `n_types' == 5 ///
		{
			gen double BIC = 2030*ln(`N')-2*loglikelihood
		}
		if `n_types' == 6 ///
		{
			gen double BIC = 2435*ln(`N')-2*loglikelihood
		}
		
		* normalize log likelihood by number of individuals *
		replace loglikelihood = loglikelihood/(`N')
		
		* keep track of iterations *
		gen iteration = `EM_loop'
		
		if `EM_loop' == 1 ///
		{
			keep iteration loglikelihood BIC
			order iteration loglikelihood BIC
			compress
			save "`datadir'gc_structural_llhood_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
			local converged = 0
			if `n_types' == 1 ///
			{
				local converged = 1
			}
		}
		
		if `EM_loop' > 1 ///
		{
			append using "`datadir'gc_structural_llhood_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
			sort iteration
			keep iteration loglikelihood BIC
			order iteration loglikelihood BIC
			compress
			save "`datadir'gc_structural_llhood_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
			gen tresh = .
			replace tresh = abs(loglikelihood[_n] - loglikelihood[_n-1]) if _n == `EM_loop'
			sum tresh
			local max_tresh = `r(max)'
			disp "Change in loglikelihood = `max_tresh'"
			sum loglikelihood if iteration == `EM_loop'
			if `max_tresh' < 0.0001 ///
			{
				local converged = 1
			}
		}
		
		clear
		disp "***** Time is now $S_TIME *****"
		disp "***** STEP 2.5: CALCULATE THE LOG LIKELIHOOD (COMPLETED) *****"
		
		******************************************
		* STEP 6: UPDATE POSTERIOR PROBABILITIES *
		******************************************
		disp "***** STEP 2.6: UPDATE POSTERIOR PROBABILITIES *****"
		disp "***** Time is now $S_TIME *****"
		local datadir T:\${data_folder}\
		if `converged' == 0 ///
		{
			use "`datadir'gc_analyze_me_tmp_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", clear
			rename state state_index
			rename tax_yr tax_yr_index
			merge 1:1 pid_int state_index tax_yr_index using "`datadir'gc_structural_probY_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
			drop if _merge == 2
			drop _merge

			keep pid pid_int state_index tax_yr_index ln_y action Laction age *entry* ///
			exper_11 exper_12 exper_13 ///
			exper_20 exper_30 ///
			probY
			
			foreach var in "Laction" "age" ///
			"action_entry" "res_city_size_entry" "inc_entry_cat" "busiden_entry_cat" ///
			"exper_11" "exper_12" "exper_13" ///
			"exper_20" "exper_30" ///
			{
				rename `var' `var'_index
			}
		
			merge m:1 state_index action_entry_index res_city_size_entry_index inc_entry_cat_index busiden_entry_cat_index using "`datadir'gc_structural_Pi_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
			drop if _merge == 2
			drop _merge
			
			merge m:1 state_index Laction_index age_index ///
			exper_11_index exper_12_index exper_13_index ///
			exper_20_index exper_30_index ///
			using "`datadir'gc_structural_CCPs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta"
			drop if _merge == 2
			drop _merge
			
			gen aCCP = .
			foreach act in ///
			"11" "12" "13" ///
			"20" "30" "90" ///
			{
				replace aCCP = CCP`act' if action == `act'
			}
			drop CCP*

			keep pid_int state_index tax_yr_index Pi probY aCCP
			order pid_int state_index tax_yr_index Pi probY aCCP
			rename tax_yr_index tax_yr
			rename state_index state
			sort pid_int tax_yr state
			by pid_int: egen entry_yr = min(tax_yr)
			by pid_int: egen exit_yr = max(tax_yr)
			reshape wide probY aCCP, i(pid_int entry_yr exit_yr state Pi) j(tax_yr)
			forvalues t = 2001(1)2011 ///
			{
				gen double num`t' = probY`t'*aCCP`t'
				replace num`t' = 1 if missing(num`t')
			}
			gen double numerateur = .
			replace numerateur = num2001*num2002*num2003*num2004*num2005*num2006*num2007*num2008*num2009*num2010*num2011*Pi
			keep pid_int state numerateur
			reshape wide numerateur, i(pid_int) j(state)
			egen double denominateur = rowtotal(numerateur*), missing
			forvalues s = 1(1)`n_types' ///
			{
				gen double qnew`s' = numerateur`s'/denominateur
			}
			keep pid_int qnew*
			reshape long qnew, i(pid_int) j(state)
			rename qnew q
			rename state state_index
			sort pid_int state_index
			duplicates drop

			compress
			save "`datadir'gc_structural_Qs_a`a'_v`v'_d`d'_q`n_types'_m`y_dgp'.dta", replace
			
			disp "***** Time is now $S_TIME *****"
			disp "***** STEP 2.6: UPDATE POSTERIOR PROBABILITIES (COMPLETED) *****"
		}
		disp "***** Time is now $S_TIME *****"
		disp "***** This is the end of loop number `EM_loop' of the EM algorithm *****"
		local EM_loop = `EM_loop'+1
	}
	disp "***** STEP 2: structural estimation - first stage (COMPLETED) *****"
}
disp "***** Finished processing STEP 2 *****"
endtime

}

********************************************
* STEP X: delete all intermediate datasets *
********************************************
disp "***** Started processing STEP X *****"
starttime
if 1 == `gateX' ///
{
	disp "***** STEP X: delete all intermediate datasets *****"
	local datadir T:\${data_folder}\
	
	disp "***** STEP X: delete all intermediate datasets (COMPLETED) *****"
}
disp "***** Finished processing STEP X *****"
endtime

********
* EXIT *
********
clear all
log close
